library(haven)
library(foreign)
library(readxl)
library(openxlsx)
library(car)
library(readstata13)
library(ggplot2)
library(dplyr)


########## Import survey data ########## ROUND 1 - 2013
# d <- read.dta13("~/PhD/Survey Data for Opinion_Policy Dataset/NORSK MEDBORGERPANEL/Norsk medborgerpanel runde 1-11.dta")


######################################################################
#--------------------------  VARIABLES ------------------------------# 
######################################################################

# INSERT row number of issue in excell docuemnt (next number after previous entry)
issue_row_num = 1

# INSERT a policy item variable (Values = FAV, OPP, DK) and the name of that variable 
# Recode according to rules specified in separate document
d$polpref <- car::recode(d$NAME, " ' '='FAV'; ' '='OPP'; ' '='DK'")
polpref_var_name <- "NAME"

# INSERT Gender variable (1=MALE, 2=FEMALE)
d$gender <- car::recode(d$w03_P1, "'Mann'='MALE'; 'Kvinne'='FEMALE'; else=NA")
table(d$gender)

# INSERT Age group variable (categorical, X number of groups, ordered youngest to oldest (1,2,...X), watch out for DK)
#d$age <- factor(d$alder, labels=c("1","2","3","4","5","6","7","8","9","10","11","12"))
#table(d$age)
# OR 
# INSERT Numeric AGE variable. Turns it into 10 equal-N categories
d$age <- ifelse(!is.na(d$w03_P5_1), d$w03_P5_1, NA)
d$age <- 8-d$age

# INSERT Eduation group variable (categorical, X number of groups, ordered lowest to highest (1,2,...X), watch out for DK)
d$edu <- ifelse(!is.na(d$w03_P4_2), d$w03_P4_2, NA)
d$edu <- car::recode(d$edu, "1=1; 2=2; 3:4=3; 5:6=4; 7:9=5; 10:11=6; 12:13=7; 14=8; else=NA")
table(d$edu)

# HOUSEHOLD income. Special for Round 3  
d$hincW1 <- d$w03_r3k25
d$hincW1 <- ifelse(d$hincW1=="Ikke svart"|d$hincW1=="Ikke spurt", NA, d$hincW1)
d$hincW3 <- d$w01_k25_1
d$hincW3[d$hincW3==97] <- NA
d$hincW3 <- car::recode(d$hincW3, "lo:149999=1; 150000:300000=2; 300001:400000=3; 400001:500000=4; 500001:600000=5; 600001:700000=6; 700001:1000000=7; 1000001:hi=8; else=NA")
table(d$hincW1)
table(d$hincW3)

d$hincome <- NA
d$hincome[d$hincW1==1|d$hincW3==1] <- 1
d$hincome[d$hincW1==2|d$hincW3==2] <- 2
d$hincome[d$hincW1==3|d$hincW3==3] <- 3
d$hincome[d$hincW1==4|d$hincW3==4] <- 4
d$hincome[d$hincW1==5|d$hincW3==5] <- 5
d$hincome[d$hincW1==6|d$hincW3==6] <- 6
d$hincome[d$hincW1==7|d$hincW3==7] <- 7
d$hincome[d$hincW1==8|d$hincW3==8] <- 8
table(d$hincome)

# INSERT Region variable
# d$region <- d$v475
# INSERT Fylke variable, NB: HEDMARK, ?ST-AGDER
d$fylke <- car::recode(d$w03_P3, "'Aust-Agder'='?st-Agder'; 'Hedemark'='Hedmark'; 'Sogn og fjordane'='Sogn og Fjordane'; 'Nord-tr?ndelag'='Nord-Tr?ndelag'; 'Vest-agder'='Vest-Agder'; 'M?re og romsdal'='M?re og Romsdal'; 'Finmark'='Finnmark';")  
table(d$fylke)

# INSERT Party variable (What will they vote if election tomorrow) (Cateogrical, no order, but use same abbreviatons as in master data)
d$party <- car::recode(d$w03_r3k3_2,"
                       'Arbeiderpartiet'='AP'; 'Det norske Arbeiderparti'='AP'; 'Ap'='AP';'DNA'='AP'; 
                       'H?yre'='H'; 'H0yre'='H'; 'H'='H';
                       'AKP (m-l)'='AKP'; 'AKP (Arbeidernes Kommunistparti (m-l)'='AKP'; 
                       'Kommunistene (NKP)'='NKP'; 'NKP'='NKP'; 
                       'Kristelig Folkeparti'='KRF'; 'KrF'='KRF'; 
                       'Senterpartiet'='SP'; 'Sp'='SP'; 
                       'Sosialistisk Folkeparti'='SF'; 'Sosialistisk Folkeparti eller Sosialistisk Valgforbund'='SF'; 
                       'Sosialistisk Venstreparti'='SV'; 'SV'='SV'; 
                       'Venstre'='V'; 'V'='V';
                       'Det nye Folkeparti'='DNF'; 'Det nye Folkepartiet'='DNF'; 
                       'DLF'='DLF'; 
                       'Anders Langes Parti'='ALP';
                       'Fremskrittspartiet'='FRP'; 'FrP'='FRP'; 
                       'Kystpartiet'='KY'; 
                       'R?d Valgallianse'='RV'; 'RV'='RV'; 
                       'R?dt'='R';
                       'Milj?partiet De Gr?nne'='MDG'; 
                       'Hadde ikke stemmerett'='NOVOTE'; 'Hadde ikke stemmerett'='NOVOTE'; 'Hadde ikke stemmeret'='NOVOTE'; 'Stemte ikke'='NOVOTE'; 
                       else=NA")  
table(d$party)

d$party1 <- ifelse(d$w01_k2=="Nei", "NOVOTE", d$w01_k3)
d$party1 <- car::recode(d$party1, "9='R'; 5='SV'; 8='AP'; 7='MDG'; 4='V'; 1='KRF'; 6='SP'; 2='H'; 3='FRP'; 'NOVOTE'='NOVOTE'; else=NA")
table(d$party1)

d$party2 <- NA
d$party2[d$party=="R"|d$party1=="R"] <- "R"
d$party2[d$party=="SV"|d$party1=="SV"] <- "SV"
d$party2[d$party=="AP"|d$party1=="AP"] <- "AP"
d$party2[d$party=="SP"|d$party1=="SP"] <- "SP"
d$party2[d$party=="MDG"|d$party1=="MDG"] <- "MDG"
d$party2[d$party=="KRF"|d$party1=="KRF"] <- "KRF"
d$party2[d$party=="V"|d$party1=="V"] <- "V"
d$party2[d$party=="H"|d$party1=="H"] <- "H"
d$party2[d$party=="FRP"|d$party1=="FRP"] <- "FRP"
d$party2[d$party=="NOVOTE"|d$party1=="NOVOTE"] <- "NOVOTE"
table(d$party2)
d$party <- d$party2


######################################################################
#---------------------------- START ---------------------------------# 
######################################################################

# Age distribution 
age_dist <- as.data.frame(table(d$age, d$polpref))
age_dist$name <- paste("AGE", age_dist$Var1, sep ="")
age_dist$name <- paste(age_dist$name, age_dist$Var2, sep = "_")
age_dist <- age_dist %>% select(name, Freq)

# Education distribution
edu_dist <- as.data.frame(table(d$edu, d$polpref))
edu_dist$name <- paste("EDU", edu_dist$Var1, sep ="")
edu_dist$name <- paste(edu_dist$name, edu_dist$Var2, sep = "_")
edu_dist <- edu_dist %>% select(name, Freq)

# Income HOUSHOLD distribution
hinc_dist <- as.data.frame(table(d$hincome, d$polpref))
hinc_dist$name <- paste("HINC", hinc_dist$Var1, sep ="")
hinc_dist$name <- paste(hinc_dist$name, hinc_dist$Var2, sep = "_")
hinc_dist <- hinc_dist %>% select(name, Freq)

# Party distribution
p_dist <- as.data.frame(table(d$party, d$polpref))
p_dist$name <- paste("P", p_dist$Var1, p_dist$Var2, sep ="_")
p_dist <- p_dist %>% select(name, Freq)

# Region distribution
#rg_dist <- as.data.frame(table(d$region, d$polpref))
#rg_dist$name <- paste("RG", rg_dist$Var1, rg_dist$Var2, sep ="_")
#rg_dist <- rg_dist %>% select(name, Freq)

# Fylke distribution
fy_dist <- as.data.frame(table(d$fylke, d$polpref))
fy_dist$name <- paste("FY", fy_dist$Var1, fy_dist$Var2, sep ="_")
fy_dist <- fy_dist %>% select(name, Freq)

# Gender
g_dist <- as.data.frame(table(d$gender, d$polpref))
g_dist$name <- paste(g_dist$Var1, g_dist$Var2, sep ="_")
g_dist <- g_dist %>% select(name, Freq)

# Overall
o_dist <- as.data.frame(table(d$polpref))
o_dist$name <- paste("OVERALL", o_dist$Var1, sep ="_")
o_dist <- o_dist %>% select(name, Freq)

###### Single variable distributions: 
# Single Age distribution 
age_dist_s <- as.data.frame(table(d$age))
age_dist_s$name <- paste("S_AGE", age_dist_s$Var1, sep ="")
age_dist_s <- age_dist_s %>% select(name, Freq)

# Single Education distribution
edu_dist_s <- as.data.frame(table(d$edu))
edu_dist_s$name <- paste("S_EDU", edu_dist_s$Var1, sep ="")
edu_dist_s <- edu_dist_s %>% select(name, Freq)

# Single Income HOUSHOLD distribution
hinc_dist_s <- as.data.frame(table(d$hincome))
hinc_dist_s$name <- paste("S_HINC", hinc_dist_s$Var1, sep ="")
hinc_dist_s <- hinc_dist_s %>% select(name, Freq)

# Rbind the data frames
m1 <- rbind(age_dist, edu_dist, hinc_dist, p_dist, age_dist_s, edu_dist_s, hinc_dist_s, fy_dist, o_dist, g_dist) 
m2 <- data.frame(t(m1))
colnames(m2) <- as.character(unlist(m2[1,]))
m2 = m2[-1, ]
m2$ISSUE <- issue_row_num
m2 <- mutate_all(m2, function(x) as.numeric(as.character(x)))

m2$ORG_VARNAME <- polpref_var_name
m2$ORG_DATASET_N <- nrow(d)

# Rbind with master data 
master_data <- read_excel("~/PhD/Dataset/PhD Dataset okt 3.xlsx")
rbind.all.columns <- function(x, y) {
  x.diff <- setdiff(colnames(x), colnames(y))
  y.diff <- setdiff(colnames(y), colnames(x))
  
  x[, c(as.character(y.diff))] <- NA
  
  y[, c(as.character(x.diff))] <- NA
  
  return(rbind(x, y))
}
master_data <- rbind.all.columns(master_data, m2)
master_data <- master_data %>% arrange(ISSUE)
write.xlsx(master_data, '~/PhD/Dataset/PhD Dataset okt 3.xlsx')

table(d$polpref,d$fylke)
table(d$polpref,d$age)
table(d$polpref,d$gender)
table(d$polpref,d$hincome)
table(d$polpref,d$edu)
prop.table(table(d$polpref,d$hincome),2)

######################################################################
#----------------------------- END ----------------------------------# 
######################################################################